############################################################################
########################	   function define       #######################
############################################################################
library(amap)
error.bar <- function(x, y, upper, lower=upper, length=0.1,...){
  if(length(x) != length(y) | length(y) !=length(lower) | length(lower) != length(upper))
    stop("vectors must be same length")
  arrows(x,y+upper, x, y-lower, angle=90, code=3, length=length, ...)
}

NaiveGeneBarplot <- function(genes,cols){
	for(each in genes){
		plot_matrix <- nData[each,n_time_point]
		ymax <- max(plot_matrix)
		ymin <- min(plot_matrix)
		plot_sd <- n_log_sd[each,n_time_point]
		n <- c(2,2,2,2,3,2,3,3,2,2,2)
		bp <- barplot(plot_matrix,col=cols,main=each,ylab="log2(fpkm+1)",names.arg=n_label,las=2,bty="l",border = NA,ylim=c(min(ymin*1.4,0),ymax*1.4))
		error.bar(bp,plot_matrix, 1.96 * plot_sd / sqrt(n),col="black",length=0.02,lwd=0.8)
	}
}

############################################################################
#########################	   read in data       ##########################
############################################################################

############ 2nd naive RNAseq
logfpkm2nd <- read.table("../data/2nd.reprogramming.lg2.all.fpkm.txt",header=T,row.names=1)
n_path <- c("hiF_r1","hiF_r2","he0_r1","he0_r2","he2_r1","he2_r2","he6_r1","he6_r2","n8_r1","n8_r2","n8_r3","n12_r1","n12_r2","n14_r1","n14_r2","n14_r3","n20_r1","n20_r2","n20_r3","n24p_r1","n24p_r2","n24m_r1","n24m_r2","niPS_r1","niPS_r2")
nData_tmp <- logfpkm2nd[,n_path]
nfpkm2nd <- 2**nData_tmp - 1

n_time_point <- c("hiF","he0","he2","he6","n8","n12","n14","n20","n24p","n24m","niPS")
n_label <- c("hiF-T","0d","2d","6d","8d","12d","14d","20d","24d+dox","24d-dox","niPSC-T")
nData2ndfpkm <- cbind(apply(nfpkm2nd[,1:2],1,mean),apply(nfpkm2nd[,3:4],1,mean),apply(nfpkm2nd[,5:6],1,mean),apply(nfpkm2nd[,7:8],1,mean),apply(nfpkm2nd[,9:11],1,mean),apply(nfpkm2nd[,12:13],1,mean),apply(nfpkm2nd[,14:16],1,mean),apply(nfpkm2nd[,17:19],1,mean),apply(nfpkm2nd[,20:21],1,mean),apply(nfpkm2nd[,22:23],1,mean),apply(nfpkm2nd[,24:25],1,mean))
colnames(nData2ndfpkm) <- n_time_point
rownames(nData2ndfpkm) <- rownames(nfpkm2nd)
nData <- log2(nData2ndfpkm+1)
############ 2nd primed RNAseq
pData2ndfpkm <- read.table("../data/paper.primed.fpkm.txt",header=T,row.names=1)
pData <- log2(pData2ndfpkm+1)

############ Normalize
library(edgeR)
genes <- intersect(row.names(nData),row.names(pData))
paperpath <- c("hiFT","d2","d5","d8","d14","d20","d24p","d24m","hiPST")
all_data <- cbind(logfpkm2nd[genes,n_path],pData[genes,c("hiFT","d2","d5","d8","d14","d20","d24p","d24m","hiPST")])
batch <- as.factor(c(rep(1,length(n_path)),rep(2,length(paperpath))))
rmbatch_data <- removeBatchEffect(all_data,batch=batch)
rmbatch_data[rmbatch_data<0] = 0

nData <- cbind(apply(rmbatch_data[,1:2],1,mean),apply(rmbatch_data[,3:4],1,mean),apply(rmbatch_data[,5:6],1,mean),apply(rmbatch_data[,7:8],1,mean),apply(rmbatch_data[,9:11],1,mean),apply(rmbatch_data[,12:13],1,mean),apply(rmbatch_data[,14:16],1,mean),apply(rmbatch_data[,17:19],1,mean),apply(rmbatch_data[,20:21],1,mean),apply(rmbatch_data[,22:23],1,mean),apply(rmbatch_data[,24:25],1,mean))
colnames(nData) <- n_time_point
rownames(nData) <- genes
pData <- rmbatch_data[genes,paperpath]

common_time_point <- c("hiF-T","2d","6d","8d","14d","20d","24d+dox","24d-dox","iPSC-T")

n_log_sd <- cbind(apply(rmbatch_data[,1:2],1,sd),apply(rmbatch_data[,3:4],1,sd),apply(rmbatch_data[,5:6],1,sd),apply(rmbatch_data[,7:8],1,sd),apply(rmbatch_data[,9:11],1,sd),apply(rmbatch_data[,12:13],1,sd),apply(rmbatch_data[,14:16],1,sd),apply(rmbatch_data[,17:19],1,sd),apply(rmbatch_data[,20:21],1,sd),apply(rmbatch_data[,22:23],1,sd),apply(rmbatch_data[,24:25],1,sd))
colnames(n_log_sd) <- n_time_point
rownames(n_log_sd) <- rownames(rmbatch_data)
n_log_sd[n_log_sd < 0] <- 0

early_down <- intersect(intersect(row.names(nData),c("CAV2","THY1")),row.names(pData))
late_down <- intersect(intersect(row.names(nData),c("SHOX2","CDH2")),row.names(pData))
# late_down <- intersect(row.names(nData),as.vector(read.table("/Users/chengchenzhao/OneDrive/Projects/naiveiPS/submission/eLife/1st_revision/Figures/Fig2/Cluster/14cluster_11_gene.txt")[,1]))
early_up <- intersect(intersect(row.names(nData),c("CDH1","NANOG")),row.names(pData))
late_up <- intersect(intersect(row.names(nData),c("DPPA3","TFCP2L1")),row.names(pData))
transiet_up_early <- intersect(intersect(row.names(nData),c("LHX9","HOXD10")),row.names(pData))
transient_up_middle <- intersect(intersect(row.names(nData),c("IGF2","AFP")),row.names(pData))
transient_up_late <- intersect(intersect(row.names(nData),c("CGB5","OVOL1")),row.names(pData))

############################################################################
####################        typical gene pattern       #####################
############################################################################
selected_cluster <- c(10,11,4,5,8,6,2)
cccol <- c("#CE0013","#FA8072","#32CD32","#7FFFD4","#3A5FCD","#004138","#00CED1","#190246","#EEEE00")
clusterCol <- cccol[c(3,4,1,2,5,6,7)]

pdf("SFig2F_early_down.pdf",width=3,height=3.2)
NaiveGeneBarplot(early_down,clusterCol[1])
dev.off()
pdf("SFig2F_late_down.pdf",width=3,height=3.2)
NaiveGeneBarplot(late_down,clusterCol[2])
dev.off()
pdf("SFig2F_early_up.pdf",width=3,height=3.2)
NaiveGeneBarplot(early_up,clusterCol[3])
dev.off()
pdf("SFig2F_late_up.pdf",width=3,height=3.2)
NaiveGeneBarplot(late_up,clusterCol[4])
dev.off()
pdf("SFig2F_transiet_up_early.pdf",width=3,height=3.2)
NaiveGeneBarplot(transiet_up_early,clusterCol[5])
dev.off()
pdf("SFig2F_transient_up_middle.pdf",width=3,height=3.2)
NaiveGeneBarplot(transient_up_middle,clusterCol[6])
dev.off()
pdf("SFig2F_transient_up_late.pdf",width=3,height=3.2)
NaiveGeneBarplot(transient_up_late,clusterCol[7])
dev.off()


# km$cluster[early_up]
# km$cluster[late_up]
# km$cluster[early_down]
# km$cluster[late_down]
# km$cluster[transiet_up_early]
# km$cluster[transient_up_middle]
# km$cluster[transient_up_late]